Air quality data collected at outdoor monitors across the United States, Puerto Rico, and the U.S. Virgin Islands. The data comes primarily from the AOS data base. (Among which we chose the data of Ozone and SO2.)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(ggmap)
df<- read_csv("Ozone.zip")
## Parsed with column specification:
## cols(
## .default = col_character(),
## `Site Num` = col_integer(),
## `Parameter Code` = col_integer(),
## POC = col_integer(),
## Latitude = col_double(),
## Longitude = col_double(),
## `Date Local` = col_date(format = ""),
## `Time Local` = col_time(format = ""),
## `Date GMT` = col_date(format = ""),
## `Time GMT` = col_time(format = ""),
## `Sample Measurement` = col_double(),
## MDL = col_double(),
## `Date of Last Change` = col_date(format = "")
## )
## See spec(...) for full column specifications.
df2<- read_csv("SO2.zip")
## Parsed with column specification:
## cols(
## .default = col_character(),
## `Parameter Code` = col_integer(),
## POC = col_integer(),
## Latitude = col_double(),
## Longitude = col_double(),
## `Date Local` = col_date(format = ""),
## `Time Local` = col_time(format = ""),
## `Date GMT` = col_date(format = ""),
## `Time GMT` = col_time(format = ""),
## `Sample Measurement` = col_double(),
## MDL = col_double(),
## `Method Code` = col_integer(),
## `Date of Last Change` = col_date(format = "")
## )
## See spec(...) for full column specifications.
head(df)
## # A tibble: 6 × 24
## `State Code` `County Code` `Site Num` `Parameter Code` POC Latitude
## <chr> <chr> <int> <int> <int> <dbl>
## 1 01 049 9991 44201 1 34.289
## 2 01 049 9991 44201 1 34.289
## 3 01 049 9991 44201 1 34.289
## 4 01 049 9991 44201 1 34.289
## 5 01 049 9991 44201 1 34.289
## 6 01 049 9991 44201 1 34.289
## # ... with 18 more variables: Longitude <dbl>, Datum <chr>, `Parameter
## # Name` <chr>, `Date Local` <date>, `Time Local` <time>, `Date
## # GMT` <date>, `Time GMT` <time>, `Sample Measurement` <dbl>, `Units of
## # Measure` <chr>, MDL <dbl>, Uncertainty <chr>, Qualifier <chr>, `Method
## # Type` <chr>, `Method Code` <chr>, `Method Name` <chr>, `State
## # Name` <chr>, `County Name` <chr>, `Date of Last Change` <date>
head(df2)
## # A tibble: 6 × 24
## `State Code` `County Code` `Site Num` `Parameter Code` POC Latitude
## <chr> <chr> <chr> <int> <int> <dbl>
## 1 01 073 0023 42401 2 33.55306
## 2 01 073 0023 42401 2 33.55306
## 3 01 073 0023 42401 2 33.55306
## 4 01 073 0023 42401 2 33.55306
## 5 01 073 0023 42401 2 33.55306
## 6 01 073 0023 42401 2 33.55306
## # ... with 18 more variables: Longitude <dbl>, Datum <chr>, `Parameter
## # Name` <chr>, `Date Local` <date>, `Time Local` <time>, `Date
## # GMT` <date>, `Time GMT` <time>, `Sample Measurement` <dbl>, `Units of
## # Measure` <chr>, MDL <dbl>, Uncertainty <chr>, Qualifier <chr>, `Method
## # Type` <chr>, `Method Code` <int>, `Method Name` <chr>, `State
## # Name` <chr>, `County Name` <chr>, `Date of Last Change` <date>
#Overlook of Method used
df%>%
group_by(`Method Name`)%>%
summarise(n())
## # A tibble: 6 × 2
## `Method Name` `n()`
## <chr> <int>
## 1 Instrumental - Chemiluminescence API Model 265E and T265 3505
## 2 Instrumental - Ecotech Serinus 10 2910
## 3 INSTRUMENTAL - ULTRA VIOLET 808729
## 4 INSTRUMENTAL - ULTRA VIOLET ABSORPTION 845830
## 5 INSTRUMENTAL - ULTRAVIOLET RADIATION ABSORBTN 8189
## 6 Instrumental - UV absorption photometry/UV 2B model 202 and 205 8562
#See if there is difference between the data generated using different Method.
df%>%
group_by(`Method Name`)%>%
summarise(mean_measure = mean(`Sample Measurement`))
## # A tibble: 6 × 2
## `Method Name`
## <chr>
## 1 Instrumental - Chemiluminescence API Model 265E and T265
## 2 Instrumental - Ecotech Serinus 10
## 3 INSTRUMENTAL - ULTRA VIOLET
## 4 INSTRUMENTAL - ULTRA VIOLET ABSORPTION
## 5 INSTRUMENTAL - ULTRAVIOLET RADIATION ABSORBTN
## 6 Instrumental - UV absorption photometry/UV 2B model 202 and 205
## # ... with 1 more variables: mean_measure <dbl>
ggplot(df)+
geom_boxplot(aes(x = `Method Name`, y = `Sample Measurement`))
The data of sample measurement collected by different Method shows little difference.
Ozone <-df%>%
mutate(Time_in_Hour = `Time Local`/3600)
SO2 <-df2%>%
mutate(Time_in_Hour1 = `Time Local`/3600)
#Boxplot of time(minute) with Measure separated by the method Name
ggplot(Ozone)+
geom_boxplot(mapping = aes(x=factor(Time_in_Hour), y=`Sample Measurement`)) +
facet_wrap(~`Method Name`)
geom_point(mapping = aes(x=Time_in_hour , y= `Sample measurement`))
## mapping: x = Time_in_hour, y = `Sample measurement`
## geom_point: na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
ggplot(SO2)+
geom_boxplot(mapping = aes(x=factor(Time_in_Hour1), y=`Sample Measurement`)) +
facet_wrap(~`Method Name`)
geom_point(mapping = aes(x=Time_in_hour , y= `Sample measurement`))
## mapping: x = Time_in_hour, y = `Sample measurement`
## geom_point: na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
The plot showed that the amount of Ozone are generally richer at afternoon(10-17)
map <- get_map("the United States of America", zoom = 4, maptype = 'hybrid',
source = 'google', color='color')
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=the+United+States+of+America&zoom=4&size=640x640&scale=2&maptype=hybrid&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=the%20United%20States%20of%20America&sensor=false
Df_New <-
df%>%
group_by(`State Name`) %>%
mutate(mean_measure = mean(`Sample Measurement`)) %>%
select(mean_measure, `State Name`, Longitude, Latitude) %>%
unique()
ggmap(map) +
geom_point(data = Df_New, aes(x = Longitude, y = Latitude, colour = mean_measure), size = 3, alpha = 0.5)
Df2_New <-
df2 %>%
group_by(`State Name`) %>%
mutate(mean_measure = mean(`Sample Measurement`)) %>%
select(mean_measure, `State Name`, Longitude, Latitude) %>%
unique()
ggmap(map) +
geom_point(data = Df2_New, aes(x = Longitude, y = Latitude, colour = mean_measure), size = 3, alpha = 0.5)
Map shows that the Ozone are rich in the east and west coast of the United States usually contains higher amount of Ozone. And it is clear that the places covered with vegetation has more concentrated and wider covarage of Ozone. Especially around the lake area and coastal area. Map also shows that concentrated SO2 are distributed in the Northeast part and south west part of the United States.
OzoneAmount <- df %>% select(`Sample Measurement`)
SO2Amount <- df2 %>% select(`Sample Measurement`)
df3 <- merge(OzoneAmount,SO2Amount)
ggplot(df)
geom_point(mapping = aes(x = OzoneAmount,SO2Amount))